// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/base/data_url.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"

namespace net {

namespace {

    struct ParseTestData {
        const char* url;
        bool is_valid;
        const char* mime_type;
        const char* charset;
        const char* data;
    };

} // namespace

TEST(DataURLTest, Parse)
{
    const ParseTestData tests[] = {
        { "data:",
            false,
            "",
            "",
            "" },

        { "data:,",
            true,
            "text/plain",
            "US-ASCII",
            "" },

        { "data:;base64,",
            true,
            "text/plain",
            "US-ASCII",
            "" },

        { "data:;charset=,test",
            false,
            "",
            "",
            "" },

        { "data:TeXt/HtMl,<b>x</b>",
            true,
            "text/html",
            "US-ASCII",
            "<b>x</b>" },

        { "data:,foo",
            true,
            "text/plain",
            "US-ASCII",
            "foo" },

        { "data:;base64,aGVsbG8gd29ybGQ=",
            true,
            "text/plain",
            "US-ASCII",
            "hello world" },

        // Allow invalid mediatype for backward compatibility but set mime_type to
        // "text/plain" instead of the invalid mediatype.
        { "data:foo,boo",
            true,
            "text/plain",
            "US-ASCII",
            "boo" },

        // When accepting an invalid mediatype, override charset with "US-ASCII"
        { "data:foo;charset=UTF-8,boo",
            true,
            "text/plain",
            "US-ASCII",
            "boo" },

        // Invalid mediatype. Includes a slash but the type part is not a token.
        { "data:f(oo/bar;baz=1;charset=kk,boo",
            true,
            "text/plain",
            "US-ASCII",
            "boo" },

        { "data:foo/bar;baz=1;charset=kk,boo",
            true,
            "foo/bar",
            "kk",
            "boo" },

        { "data:foo/bar;charset=kk;baz=1,boo",
            true,
            "foo/bar",
            "kk",
            "boo" },

        { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
          "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
            true,
            "text/html",
            "US-ASCII",
            "<html><body><b>hello world</b></body></html>" },

        { "data:text/html,<html><body><b>hello world</b></body></html>",
            true,
            "text/html",
            "US-ASCII",
            "<html><body><b>hello world</b></body></html>" },

        // the comma cannot be url-escaped!
        { "data:%2Cblah",
            false,
            "",
            "",
            "" },

        // invalid base64 content
        { "data:;base64,aGVs_-_-",
            false,
            "",
            "",
            "" },

        // Spaces should be removed from non-text data URLs (we already tested
        // spaces above).
        { " bG8gd2  9ybGQ=",
            true,
            "text/plain",
            "US-ASCII",
            "hello world" },

        // Other whitespace should also be removed from anything base-64 encoded.
        { "data:;base64,aGVs bG8gd2  \n9ybGQ=",
            true,
            "text/plain",
            "US-ASCII",
            "hello world" },

        // In base64 encoding, escaped whitespace should be stripped.
        // (This test was taken from acid3)
        // http://b/1054495
        { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
          "%20",
            true,
            "text/javascript",
            "US-ASCII",
            "d4 = 'four';" },

        // Only unescaped whitespace should be stripped in non-base64.
        // http://b/1157796
        { "data:img/png,A  B  %20  %0A  C",
            true,
            "img/png",
            "US-ASCII",
            "AB \nC" },

        { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
            true,
            "text/plain",
            "utf-8",
            "Hell\xC3\xB6" },

        // Not sufficiently padded.
        { "data:;base64,aGVsbG8gd29ybGQ",
            true,
            "text/plain",
            "US-ASCII",
            "hello world" },

        // Bad encoding (truncated).
        { "data:;base64,aGVsbG8gd29yb",
            false,
            "",
            "",
            "" },

        // BiDi control characters should be unescaped and preserved as is, and
        // should not be replaced with % versions. In the below case, \xE2\x80\x8F
        // is the RTL mark and the parsed text should preserve it as is.
        {
            "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
            true,
            "text/plain",
            "utf-8",
            "\xE2\x80\x8Ftest" },

        // Same as above but with Arabic text after RTL mark.
        {
            "data:text/plain;charset=utf-8,"
            "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
            true,
            "text/plain",
            "utf-8",
            "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1" },

        // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
        // wrapped in a GURL, this URL and the next effectively become the same as
        // the previous two URLs.
        {
            "data:text/plain;charset=utf-8,%E2%80%8Ftest",
            true,
            "text/plain",
            "utf-8",
            "\xE2\x80\x8Ftest" },

        // Same as above but with Arabic text after RTL mark.
        {
            "data:text/plain;charset=utf-8,"
            "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
            true,
            "text/plain",
            "utf-8",
            "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1" }

        // TODO(darin): add more interesting tests
    };

    for (size_t i = 0; i < arraysize(tests); ++i) {
        std::string mime_type;
        std::string charset;
        std::string data;
        bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
        EXPECT_EQ(ok, tests[i].is_valid);
        if (tests[i].is_valid) {
            EXPECT_EQ(tests[i].mime_type, mime_type);
            EXPECT_EQ(tests[i].charset, charset);
            EXPECT_EQ(tests[i].data, data);
        }
    }
}

} // namespace net
